*early career earnings growth in NLSY79 and 97
local dir "YOUR PATH HERE"
use "`dir'\nlsy79_learning_panel.dta", clear
append using "`dir'\nlsy97_learning_panel.dta"
gen sample=1 if nlsy79==1
replace sample=2 if nlsy97==1
drop if year==2020
*create unique id
egen uid=group(sample id)
drop if id==.
*fix race
rename race race_old
gen race=1 if race_old==1 & sample==1
replace race=1 if race_old==3 & (sample==2 | sample==3)
replace race=2 if race_old==2
replace race=3 if race_old==3 & sample==1
replace race=3 if race_old==1 & (sample==2 | sample==3)
drop race_old occ
drop nlsy97 nlsy79 
rename iq_std iq
*trim wages
replace wage=3 if wage>0 & wage<3
replace wage=200 if wage>200 & wage!=.
gen ln_wage=ln(wage)
gen ln_incwage=ln(incwage)
*use occ1990dd to make comparable across many years
merge m:1 occ1990dd using "`occdir'\onet98_occ1990dd.dta", nogen
*routine task percentile is normed to 1980. Norm to 2017 and then merge in
merge m:1 occ1990dd using "`dir'\routine_convert_2017.dta", keep(master match) nogen

*merge also for first occ
rename occ1990dd occ_temp
rename routine_2017 routine_temp

rename first_occ occ1990dd
merge m:1 occ1990dd using "`dir'\routine_convert_2017.dta", keep(master match) nogen
rename routine_2017 routine_firstocc
rename routine_temp routine_2017
rename occ1990dd first_occ
rename occ_temp occ1990dd

gen hs_deg=educ==12
gen some_coll=educ>=13 & educ<=15
gen ba_deg=educ>=16 & educ<=20
egen race_gender=group(race male)
*potential experience - put same bounds of 0 to 37, at least age 21
gen pot_exp=age-educ-6
replace pot_exp=. if age<19
replace pot_exp=0 if pot_exp<0
*cap at 39 years for 1979 cohort because 2018-1979=39
*cap at 23 years for 1997 cohort because 2019-1997=22, round up to 23 to create 2 year bins for consistency
replace pot_exp=39 if pot_exp>39 & pot_exp!=. & sample==1
replace pot_exp=23 if pot_exp>23 & pot_exp!=. & sample==2
forvalues y=0(2)38 {
	local a=`y'+1
	gen potexp_`y'to`a'=pot_exp>=`y' & pot_exp<=`a'
}
foreach var of varlist potexp_0to1-potexp_38to39 {
	gen ba_`var'=ba_deg*`var'
}
*actual work experience - can use past calendar year (pcy) or since last interview (sli)
gen work_exp=wkexp_yr_total-wkexp_yr_youth
*replace work_exp=39 if work_exp>39 & work_exp!=. & sample==1
*replace work_exp=23 if work_exp>23 & work_exp!=. & sample==2
replace work_exp=round(work_exp)
gen work_exp2=work_exp^2
gen max_exp=maxexp_yr_total-wkexp_yr_youth
*replace max_exp=39 if max_exp>39 & max_exp!=. & sample==1
*replace max_exp=23 if max_exp>23 & max_exp!=. & sample==2
replace max_exp=round(max_exp)
gen max_exp2=max_exp^2

gen sli_exp=wkexp_sli_total-wkexp_yr_youth
*replace sli_exp=39 if sli_exp>39 & sli_exp!=. & sample==1
*replace sli_exp=23 if sli_exp>23 & sli_exp!=. & sample==2
replace sli_exp=round(sli_exp)
gen sli_exp2=sli_exp^2
gen maxsli_exp=maxexp_sli_total-wkexp_yr_youth
*replace maxsli_exp=39 if maxsli_exp>39 & maxsli_exp!=. & sample==1
*replace maxsli_exp=23 if maxsli_exp>23 & maxsli_exp!=. & sample==2
replace maxsli_exp=round(maxsli_exp)
gen maxsli_exp2=maxsli_exp^2
gen age2=age^2
*Returns by age
forvalues y=19(2)59 {
	local a=`y'+1
	gen age_`y'to`a'=age>=`y' & age<=`a'
}
foreach var of varlist age_21to22- age_59to60 {
	gen ba_`var'=ba_deg*`var'
}
foreach var of varlist age_21to22- age_59to60 {
	gen iq_`var'=iq*`var'
}
foreach var of varlist age_21to22- age_59to60 {
	gen ba_iq_`var'=ba_deg*iq*`var'
}
foreach var of varlist age_21to22- age_59to60 {
	gen routine_`var'=routine_2017*`var'
}
forvalues y=0(2)42 {
	local a=`y'+1
	gen workexp_`y'to`a'=work_exp>=`y' & work_exp<=`a'
}
forvalues y=0(2)42 {
	local a=`y'+1
	gen maxexp_`y'to`a'=max_exp>=`y' & max_exp<=`a'	
}
forvalues y=0(2)42 {
	local a=`y'+1
	gen sliexp_`y'to`a'=sli_exp>=`y' & sli_exp<=`a'	
}
forvalues y=0(2)42 {
	local a=`y'+1
	gen maxsliexp_`y'to`a'=maxsli_exp>=`y' & maxsli_exp<=`a'	
}
foreach var of varlist workexp_0to1-workexp_42to43 {
	gen ba_`var'=ba_deg*`var'
}
foreach var of varlist maxexp_0to1-maxexp_42to43 {
	gen ba_`var'=ba_deg*`var'
}
foreach var of varlist sliexp_0to1-sliexp_42to43 {
	gen ba_`var'=ba_deg*`var'
}
foreach var of varlist maxsliexp_0to1-maxsliexp_42to43 {
	gen ba_`var'=ba_deg*`var'
}
foreach var of varlist workexp_0to1-workexp_42to43 {
	gen iq_`var'=iq*`var'
}
foreach var of varlist workexp_0to1-workexp_42to43 {
	gen routine_`var'=routine_2017*`var'
}
*Create categorical variables for occ tenure
foreach x in occ {
	gen `x'_tenure_0=`x'_tenure==0
	gen `x'_tenure_1to2=`x'_tenure>=1 & `x'_tenure<=2
	gen `x'_tenure_3to4=`x'_tenure>=3 & `x'_tenure<=4
	gen `x'_tenure_5to6=`x'_tenure>=5 & `x'_tenure<=6
	gen `x'_tenure_7to8=`x'_tenure>=7 & `x'_tenure<=8
	gen `x'_tenure_9to10=`x'_tenure>=9 & `x'_tenure<=10
	gen `x'_tenure_11to12=`x'_tenure>=11 & `x'_tenure<=12
	gen `x'_tenure_13to14=`x'_tenure>=13 & `x'_tenure<=14
	gen `x'_tenure_15to16=`x'_tenure>=15 & `x'_tenure<=16
	gen `x'_tenure_17to18=`x'_tenure>=17 & `x'_tenure<=18
	gen `x'_tenure_19to20=`x'_tenure>=19 & `x'_tenure<=20
	gen `x'_tenure_g20=`x'_tenure>20 & `x'_tenure<=25
}
foreach var of varlist occ_tenure_* {
	replace `var'=. if occ_tenure==.
}

*interactions
foreach y in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen ba_`y'=ba_deg*occ_tenure_`y'
}
foreach y in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen iq_`y'=iq*occ_tenure_`y'
}
foreach y in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen routine_`y'=routine_2017*occ_tenure_`y'
}
foreach x in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen ba_routine_`x'=ba_`x'*routine_2017
}
foreach x in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen iq_routine_`x'=iq_`x'*routine_2017
}

*create occupation categories
gen mgmt=occ1990dd>=4 & occ1990dd<=22
gen superv=1 if occ1990dd==243 | occ1990dd==303 | (occ1990dd>=413 & occ1990dd<=415) | occ1990dd==433 | occ1990dd==448 | occ1990dd==450 | occ1990dd==470 | occ1990dd==503 | occ1990dd==558 | occ1990dd==628 | occ1990dd==803 | occ1990dd==823
replace mgmt=1 if superv==1
gen prof=occ1990dd>=23 & occ1990dd<=235
gen white=occ1990dd>=243 & occ1990dd<=389 & superv!=1
gen blue=occ1990dd>=405 & occ1990dd<=889 & superv!=1
foreach x in mgmt prof white blue {
    replace `x'=. if occ1990dd==.
}
gen mgmt_prof=mgmt==1 | prof==1
gen white_blue=white==1 | blue==1
replace mgmt_prof=. if occ1990dd==.
replace white_blue=. if occ1990dd==.
foreach x in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen mgmt_prof_`x'=mgmt_prof*occ_tenure_`x'
}
foreach x in 0 1to2 3to4 5to6 7to8 9to10 11to12 13to14 15to16 17to18 19to20 g20 {
	gen white_blue_`x'=white_blue*occ_tenure_`x'
}

xtset uid

*Figure 1 - compare benchmark Mincer model to time-varying college premium by experience

*1) mincer model; 2) potential experience 3) actual experience in panel format; 4) mincer + interaction
*restrict to work experience of 34-35 years or less, because potential experience for grad degrees (educ=20) caps at 60-20-6=34*
reg ln_wage ba_deg work_exp work_exp2 i.year i.age i.race_gender if age>=22 & age<=60 & sample==1 & work_exp<=35, vce(cluster uid)
xi: reg ln_wage ba_potexp_2to3-ba_potexp_34to35 potexp_2to3-potexp_34to35 i.year i.race_gender if age>=22 & age<=60 & sample==1 & pot_exp<=35, vce(cluster uid)
xi: xtreg ln_wage ba_workexp_2to3-ba_workexp_34to35 workexp_2to3-workexp_34to35 if age>=22 & age<=60 & sample==1 & work_exp<=35, fe vce(cluster uid)
*bottom line - ba*work_exp interaction fits much better
gen ba_wkexp=ba_deg*work_exp
reg ln_wage ba_wkexp ba_deg work_exp i.year i.age i.race_gender if age>=22 & age<=60 & sample==1 & work_exp<=35, vce(cluster uid)

*Figure 2 - Figure 1, but in NLSY97
reg ln_wage ba_deg work_exp work_exp2 i.year i.age i.race_gender if age>=22 & age<=38 & sample==2 & work_exp<=17, vce(cluster uid)
xi: reg ln_wage ba_potexp_2to3-ba_potexp_22to23 potexp_2to3-potexp_22to23 i.year i.race_gender if age>=22 & age<=38 & sample==2 & pot_exp<=17, vce(cluster uid)
xi: xtreg ln_wage ba_workexp_2to3-ba_workexp_22to23 workexp_2to3-workexp_22to23 if age>=22 & age<=38 & sample==2 & work_exp<=17, fe vce(cluster uid)
reg ln_wage ba_wkexp ba_deg work_exp i.year i.age i.race_gender if age>=22 & age<=38 & sample==2 & work_exp<=17, vce(cluster uid)


*Figure 3 - wage growth in levels, by education
*Calculate annual percent change and smooth with a five-year moving average
xi: xtreg wage age_19to20-age_59to60 if age>=18 & age<=60 & sample==1 & educ>=1 & educ<=12 , fe vce(cluster uid)
xi: xtreg wage age_21to22-age_59to60 if age>=20 & age<=60 & sample==1 & educ>=13 & educ<=15, fe vce(cluster uid)
xi: xtreg wage age_23to24-age_59to60 if age>=22 & age<=60 & sample==1 & educ>=16 & educ<=20, fe vce(cluster uid)

xi: xtreg wage workexp_2to3-workexp_38to39 if age>=18 & age<=60 & sample==1 & educ>=1 & educ<=12 & work_exp<=39, fe vce(cluster uid)
xi: xtreg wage workexp_2to3-workexp_38to39 if age>=20 & age<=60 & sample==1 & educ>=13 & educ<=15 & work_exp<=39, fe vce(cluster uid)
xi: xtreg wage workexp_2to3-workexp_38to39 if age>=22 & age<=60 & sample==1 & educ>=16 & educ<=20 & work_exp<=39, fe vce(cluster uid)

*Figures 4-6 and Table 1 are in the job panel file

*Figure 7 - returns to tenure by job category
xi: xtreg ln_wage mgmt_prof_1to2-mgmt_prof_g20 white_blue_1to2-white_blue_g20 occ_tenure_1to2-occ_tenure_g20 i.occ1990dd i.emp_tenure i.work_exp i.age if age>=19 & age<=60 & occ_tenure<=25, fe vce(cluster uid)

*Figure 8 - how well does first job predict the college wage premium?
*Second line is in CPS projection file
xi: xtreg ln_wage ba_age_25to26-ba_age_59to60 age_25to26-age_59to60 if age>=23 & age<=60 & sample==1, fe vce(cluster uid)



*Appendix Results

*Appendix Figure A1 - plot actual work experience by age and education - add constant to get coefficients
xi: xtreg wkexp_yr age_21to22-age_59to60 if age>=19 & age<=60 & sample==1 & educ>=1 & educ<=12, fe vce(cluster uid)
xi: xtreg wkexp_yr age_21to22-age_59to60 if age>=19 & age<=60 & sample==1 & educ>=13 & educ<=15, fe vce(cluster uid)
xi: xtreg wkexp_yr age_21to22-age_59to60 if age>=19 & age<=60 & sample==1 & educ>=16 & educ<=20, fe vce(cluster uid)

*Appendix Figure A2 - estimate total accumulated work experience
*Subtract from potential experience, which is just age minus years of education (assume 12, 14, and 16 respectively) minus 6
xi: xtreg wkexp_yr_total age_21to22-age_59to60 if age>=18 & age<=60 & sample==1 & educ>=1 & educ<=12, fe vce(cluster uid)
xi: xtreg wkexp_yr_total age_21to22-age_59to60 if age>=18 & age<=60 & sample==1 & educ>=13 & educ<=15, fe vce(cluster uid)
xi: xtreg wkexp_yr_total age_21to22-age_59to60 if age>=18 & age<=60 & sample==1 & educ>=16 & educ<=20, fe vce(cluster uid)


*Figure A3 - compare wage growth by age and experience
*both line star with the constant as year zero
xi: xtreg wage age_19to20-age_59to60 if age>=18 & age<=60 & sample==1, fe vce(cluster uid)
xi: xtreg wage workexp_2to3-workexp_38to39 if age>=18 & age<=60 & sample==1 & work_exp<=39, fe vce(cluster uid)


*Figure A4 - show returns to ability and education by age
xi: xtreg ln_wage ba_age_23to24-ba_age_59to60 age_23to24-age_59to60 if sample==1 & age>=21, fe vce(cluster uid)
xi: xtreg ln_wage ba_age_23to24-ba_age_59to60 iq_age_23to24-iq_age_59to60 age_23to24-age_59to60 if sample==1 & age>=21, fe vce(cluster uid)

*Appendix Figure A5 - NLSY97
xi: xtreg ln_wage ba_age_23to24-ba_age_37to38 age_23to24-age_37to38 if sample==2 & age>=21 & age<=38, fe vce(cluster uid)
xi: xtreg ln_wage ba_age_23to24-ba_age_37to38 iq_age_23to24-iq_age_37to38 age_23to24-age_37to38 if sample==2 & age>=21 & age<=38, fe vce(cluster uid)

*Appendix Figures A6-A8 and Tables A1-A4 are in the job panel file

*Figure A9 - Returns to tenure by job category, by education
*A9A - high school or less
xi: xtreg ln_wage mgmt_prof_1to2-mgmt_prof_g20 white_blue_1to2-white_blue_g20 occ_tenure_1to2-occ_tenure_g20 i.occ1990dd i.emp_tenure i.work_exp i.age if age>=19 & age<=60 & occ_tenure<=25 & educ>=1 & educ<=12, fe vce(cluster uid)
*A9B - BA or more
xi: xtreg ln_wage mgmt_prof_1to2-mgmt_prof_g20 white_blue_1to2-white_blue_g20 occ_tenure_1to2-occ_tenure_g20 i.occ1990dd i.emp_tenure i.work_exp i.age if age>=19 & age<=60 & occ_tenure<=25 & educ>=16 & educ<=20, fe vce(cluster uid)

*Figure A10 and Table A5 - lower return to tenure in routine jobs
xi: areg ln_wage occ_tenure_1to2-occ_tenure_g20 iq i.educ i.emp_tenure i.work_exp i.age i.year i.sample i.race_gender if age>=19 & age<=60 & occ_tenure<=25, absorb(occ1990dd) vce(cluster uid)
xi: areg ln_wage routine_1to2-routine_g20 occ_tenure_1to2-occ_tenure_g20 iq i.educ i.emp_tenure i.work_exp i.age i.year i.sample i.race_gender if age>=19 & age<=60 & occ_tenure<=25, absorb(occ1990dd) vce(cluster uid)
xi: xtreg ln_wage occ_tenure_1to2-occ_tenure_g20 i.occ1990dd i.emp_tenure i.work_exp i.age if age>=19 & age<=60 & occ_tenure<=25, fe vce(cluster uid)
xi: xtreg ln_wage routine_1to2-routine_g20 occ_tenure_1to2-occ_tenure_g20 i.occ1990dd i.emp_tenure i.work_exp i.age if age>=19 & age<=60 & occ_tenure<=25, fe vce(cluster uid)

*Figure A11 - How does occupational sorting affect the college wage premium?
egen group_FE=group(occ1990dd occ_tenure)
xi: reg ln_wage ba_workexp_0to1-ba_workexp_38to39 workexp_2to3-workexp_38to39 i.age i.year i.race_gender if age>=22 & age<=60 & sample==1, vce(cluster uid)
*add occupation by tenure FE
xi: areg ln_wage ba_workexp_0to1-ba_workexp_38to39 workexp_2to3-workexp_38to39 i.age i.year i.race_gender if age>=22 & age<=60 & sample==1, absorb(group_FE) vce(cluster uid)
*Add IQ controls
xi: reg ln_wage ba_workexp_0to1-ba_workexp_38to39 iq_workexp_0to1-iq_workexp_38to39 workexp_2to3-workexp_38to39 i.age i.year i.race_gender if age>=22 & age<=60 & sample==1, vce(cluster uid)
xi: areg ln_wage ba_workexp_0to1-ba_workexp_38to39 iq_workexp_0to1-iq_workexp_38to39 workexp_2to3-workexp_38to39 i.age i.year i.race_gender if age>=22 & age<=60 & sample==1, absorb(group_FE) vce(cluster uid)
